/*==============================================================================
FILE NAME: Figure_C5.do
CREATED: 25 July 2025
==============================================================================*/


/* Set directory if working independently through code
if c(username)=="" { //insert username
	global rootdir "" // insert root path
	global processed_data "$rootdir/processed_data" 
	global figures "$rootdir/output/figures"  // Define global paths for replication package
} 
*/

**Figure C5
set scheme modern
use "$processed_data/incidents.dta", clear
gen RN_id=substr(RegulatedEntity,3,.)
label var RN_id "same as RN without 'RN'"
destring RN_id, replace
rename ComplaintIncident CIN
drop if IncidentStatus=="REFERRED"
gen temp = date(IncidentRecDate,"MDY")
drop IncidentRecDate
rename temp IncidentRecDate
format IncidentRecDate %td
replace IncidentRecDate = . if IncidentRecDate < 0
gen year=year(IncidentRecDate)
gen month=month(IncidentRecDate)
gen mdate=ym(year,month)
format mdate %tm
//restrict to 2003 to 2019
keep if mdate>=tm(2003m1) & mdate<=tm(2019m12)
keep CIN Media
duplicates drop
sort CIN
by CIN: gen med_id=_n
reshape wide Media, i(CIN) j(med_id)
isid CIN
gen media_cat="Air only" if Media1=="AIR" & Media2=="" & Media3==""
replace media_cat="Water only" if Media1=="WATER" & Media2=="" & Media3==""
replace media_cat="Waste only" if Media1=="WASTE" & Media2=="" & Media3==""
replace media_cat="Multiple media" if Media2!=""
replace media_cat="Type missing" if Media1==""
//drop if media_cat=="Multiple media"
sort media_cat
graph set window fontface "Times New Roman"
graph pie CIN, over(media_cat) sort descending ///
legend(position(3) row(5) size(large)) ///
ysize(6) xsize(10)


graph export "$figures/Figure_C5.pdf", replace

// Save point estimates for complaint type counts and percentages
preserve

// Count complaints by type
collapse (count) CIN, by(media_cat)
rename CIN count

// Correct total and percentage calculation
summarize count, meanonly
scalar total = r(sum)
gen percent = (count / total) * 100

// Reorder for clarity
order media_cat count percent
format percent %6.2f

// Export to CSV
export delimited using "$point_estimates/Point_Estimates_Figure_C5.csv", replace

restore

